Analysis on Reinfection of Sexually Transmitted Diseases

Yichu Chen, Gu Gong, Kate Jones, Gianni Spiga

2022-11-29

The Data

data(std)
std

Exploratory Data Analysis

Checking Proportionality

Kaplan-Meier Curves

## Call:
## survdiff(formula = surv_object ~ iinfct, data = std)
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## iinfct=gonorrhea 140       73     54.5   6.28042   7.50617
## iinfct=chlamydia 396      135    153.0   2.12201   3.81136
## iinfct=both      341      139    139.5   0.00166   0.00278
## 
##  Chisq= 8.5  on 2 degrees of freedom, p= 0.01

Hazard Ratios

Cumulative Hazards and CLogLog

## Call:
## coxph(formula = surv_object ~ iinfct, data = std)
## 
##   n= 877, number of events= 347 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.4202    0.6569   0.1457 -2.884  0.00393 **
## iinfctboth      -0.2980    0.7423   0.1450 -2.055  0.03984 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6569      1.522    0.4937    0.8741
## iinfctboth         0.7423      1.347    0.5587    0.9863
## 
## Concordance= 0.524  (se = 0.016 )
## Likelihood ratio test= 7.93  on 2 df,   p=0.02
## Wald test            = 8.37  on 2 df,   p=0.02
## Score (logrank) test = 8.46  on 2 df,   p=0.01

- The cox.zph function was then applied to the model which resulted in a p-value of 0.24 reinforcing that the proportional hazards assumption is maintained. Therefore, we chose to apply the Cox model which is elaborated on the next slide.

Finding the Best Model

The Full model

cox1 <-
  coxph(
    surv_object ~ iinfct + marital + race + os12m + os30d +
      rs12m + rs30d + abdpain + discharge + dysuria + condom + 
      itch + lesion + rash + lymph + vagina + dchexam + abnode +
      age + yschool + npartner,
    data = std
  )
summary(cox1)
## Call:
## coxph(formula = surv_object ~ iinfct + marital + race + os12m + 
##     os30d + rs12m + rs30d + abdpain + discharge + dysuria + condom + 
##     itch + lesion + rash + lymph + vagina + dchexam + abnode + 
##     age + yschool + npartner, data = std)
## 
##   n= 877, number of events= 347 
## 
##                      coef exp(coef)  se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.331853  0.717593  0.149264 -2.223  0.02620 * 
## iinfctboth      -0.263731  0.768180  0.149262 -1.767  0.07724 . 
## maritalMarried   0.054707  1.056231  0.431282  0.127  0.89906   
## maritalSingle    0.408462  1.504502  0.295237  1.384  0.16651   
## raceWhite       -0.112104  0.893951  0.141248 -0.794  0.42739   
## os12m1          -0.205985  0.813845  0.212025 -0.972  0.33129   
## os30d1          -0.337675  0.713427  0.238454 -1.416  0.15675   
## rs12m1           0.036907  1.037596  0.444944  0.083  0.93389   
## rs30d1          -0.194243  0.823458  0.565166 -0.344  0.73108   
## abdpain1         0.233865  1.263474  0.155288  1.506  0.13207   
## discharge1       0.113143  1.119792  0.114148  0.991  0.32159   
## dysuria1         0.162934  1.176960  0.155112  1.050  0.29352   
## condomnever     -0.263793  0.768133  0.119652 -2.205  0.02748 * 
## itch1           -0.149871  0.860819  0.154492 -0.970  0.33200   
## lesion1         -0.188159  0.828483  0.333106 -0.565  0.57217   
## rash1            0.003932  1.003940  0.392693  0.010  0.99201   
## lymph1          -0.030839  0.969632  0.547483 -0.056  0.95508   
## vagina1          0.349257  1.418013  0.174697  1.999  0.04559 * 
## dchexam1        -0.465508  0.627816  0.229914 -2.025  0.04290 * 
## abnode1          0.193753  1.213797  0.425224  0.456  0.64864   
## age              0.007855  1.007886  0.013891  0.565  0.57176   
## yschool         -0.127454  0.880334  0.039309 -3.242  0.00119 **
## npartner         0.076185  1.079162  0.054067  1.409  0.15881   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.7176     1.3935    0.5356    0.9615
## iinfctboth         0.7682     1.3018    0.5733    1.0292
## maritalMarried     1.0562     0.9468    0.4536    2.4596
## maritalSingle      1.5045     0.6647    0.8435    2.6835
## raceWhite          0.8940     1.1186    0.6778    1.1791
## os12m1             0.8138     1.2287    0.5371    1.2332
## os30d1             0.7134     1.4017    0.4471    1.1385
## rs12m1             1.0376     0.9638    0.4338    2.4818
## rs30d1             0.8235     1.2144    0.2720    2.4929
## abdpain1           1.2635     0.7915    0.9319    1.7130
## discharge1         1.1198     0.8930    0.8953    1.4006
## dysuria1           1.1770     0.8496    0.8684    1.5951
## condomnever        0.7681     1.3019    0.6076    0.9711
## itch1              0.8608     1.1617    0.6359    1.1652
## lesion1            0.8285     1.2070    0.4313    1.5916
## rash1              1.0039     0.9961    0.4650    2.1675
## lymph1             0.9696     1.0313    0.3316    2.8355
## vagina1            1.4180     0.7052    1.0069    1.9970
## dchexam1           0.6278     1.5928    0.4001    0.9852
## abnode1            1.2138     0.8239    0.5275    2.7932
## age                1.0079     0.9922    0.9808    1.0357
## yschool            0.8803     1.1359    0.8151    0.9508
## npartner           1.0792     0.9266    0.9707    1.1998
## 
## Concordance= 0.634  (se = 0.016 )
## Likelihood ratio test= 73.29  on 23 df,   p=4e-07
## Wald test            = 69.84  on 23 df,   p=1e-06
## Score (logrank) test = 71.68  on 23 df,   p=7e-07

Simplifying the Model

Residuals

Martingale

Transformation of Quantitative to a Categorical Variable

## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08

Schoenfield

test.ph <- cox.zph(cox.model)
test.ph
##          chisq df    p
## iinfct  3.5184  2 0.17
## condom  1.0539  1 0.30
## vagina  1.2835  1 0.26
## dchexam 0.4085  1 0.52
## yschool 0.0214  1 0.88
## GLOBAL  6.1176  6 0.41
#plot(test.ph[1], main = "Schoenfeld Residuals for differernt initial infection types")

#Condom use
ggcoxzph(test.ph[2], ggtheme =theme_minimal(),  se = FALSE, font.main = 12, point.col = "#ed7864")

#Vaginal use
ggcoxzph(test.ph[3], ggtheme =theme_minimal(),  se = FALSE, font.main = 12, point.col = "#ed7864")

#Discharge at exam use
ggcoxzph(test.ph[4], ggtheme =theme_minimal(),  se = FALSE, font.main = 12, point.col = "#ed7864")

# Years of schooling
ggcoxzph(test.ph[5], ggtheme =theme_minimal(),  se = FALSE, font.main = 12, point.col = "#ed7864")

# years of school  
figdfb1 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 6],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 4], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Years of School (Categorical)", title = "dfbeta Values for Years of School"),
  tooltip = "text"
)

# Discharge at Exam
figdfb2 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 5],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 5], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Discharge at Exam", title = "dfbeta Values for Discharge at Exam"),
  tooltip = "text"
)

# Vaginal Involvement
figdfb3 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 4],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 6], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Vaginal Involvement at Exam", title = "dfbeta Values for Vaginal Involvement at Exam"),
  tooltip = "text"
)


# Condom
figdfb4 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 3],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 6], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Condom", title = "dfbeta Values for Condom Usage"),
  tooltip = "text"
)


fig <- subplot(
  figdfb1,
  figdfb2,
  figdfb3,
  figdfb4,
  nrows = 2,
  shareX = TRUE,
  shareY = TRUE
) %>% layout(title = "dfBeta values for Years of Schooling, Discharge at Exam, Vaginal Involvement, \nand Condom Usage")

# Update title
annotations = list( 
  list( 
    x = 0.2,  
    y = 1.0,  
    text = "Years of Schooling",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),  
  list( 
    x = 0.8,  
    y = 1,  
    text = "Discharge at Exam",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),  
  list( 
    x = 0.2,  
    y = 0.475,  
    text = "Vaginal Involvement",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),
  list( 
    x = 0.8,  
    y = 0.475,  
    text = "Condom Usage",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ))

fig <- fig %>%layout(annotations = annotations) 
fig

dfBeta

Outliers

In Conclusion

Our Best Model

## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08